#define PRECISION double

#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64 : enable
#else
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#endif



__kernel void OpenCLTemplateKernel(__global PRECISION *input1, __global PRECISION *input2, __global PRECISION *output, const int cols , const int cols2 ,const int l)
{

	unsigned int i = get_global_id(0);//1st loop
	unsigned int j = get_global_id(1);//2nd loop
	PRECISION sum=0;
    for(int k=0 ; k<l ; k++)
    {
		sum=sum + input1[i*cols+k]*input2[k*cols2+j];
    }
    output[i*cols2+j]=sum;
	//sum=0;
}